# 用chatGPT-5编写
import json

def convert_dataset(input_file, output_file):
    converted = {
        "type": "text2text",
        "instances": []
    }

    # 按行读取（NDJSON 格式）
    with open(input_file, 'r', encoding='utf-8') as f:
        for line in f:
            if not line.strip():
                continue
            item = json.loads(line)

            question = item.get("question", "")
            subject = item.get("subject", "")
            choices = item.get("choices", [])
            answer_idx = item.get("answer", None)

            if answer_idx is not None and 0 <= answer_idx < len(choices):
                output = choices[answer_idx]
            else:
                output = ""

            # 合并成 input 字段
            input_text = f"question: {question}\nsubject: {subject}\nchoices: {choices}"

            converted["instances"].append({
                "input": input_text,
                "output": output
            })

    # 保存结果
    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(converted, f, ensure_ascii=False, indent=2)

if __name__ == "__main__":
    convert_dataset("auxiliary_train.json", "output.json")
